how to extract data from a document using c#

82

how to extract data from a document using c# -

void ExtractRegex()  
{  
    using (var fs = File.OpenRead(Path.Combine("Resources", "PDFs", "InvoiceDemo.pdf")))  
    {  
        //Load Sample PDF document  
        GcPdfDocument doc = **new** GcPdfDocument();  
        doc.Load(fs);

        //Find Invoice total amount  
        FindTextParams searchParam1 = **new** FindTextParams(@"(Total)\r\n\$([-+]?[0-9]*\.?[0-9]+)", false, false, 72, 72, true, true);  
        IList<FoundPosition> pos1 = doc.FindText(searchParam1);  
        string totalAmount = pos1[0].NearText.Substring(pos1[0].PositionInNearText + pos1[0].TextMapFragment[0].Length).TrimStart();  
        Console.WriteLine("Total amount found using regex in FindText method: " + totalAmount);

        //Find customer's email address from Invoice  
        FindTextParams searchParam2 = new FindTextParams(@"[a-zA-Z0-9+_.-]+@[a-zA-Z0-9.-]+", false, false, 72, 72, true, true);  
        IList<FoundPosition> pos2 = doc.FindText(searchParam2);  
        string foundEmail = pos2[0].NearText.Substring(pos2[0].PositionInNearText, pos2[0].TextMapFragment[0].Length);  
        Console.WriteLine("Email Address found using regex in FindText method: " + foundEmail);  
    }  
}

Comments

Submit
0 Comments